Smiley face

Our Algorithm_BMLCS(Diagonal Extension and Domination, 2017)


Main features
Abstract of the paper [Chan 2017]

Given a pair of merging sequences A, B and a target sequence T, the merged longest common subsequence (MLCS) problem is to find out the longest common subsequence (LCS) between sequences E ( A, B ) and T, where E ( A, B ) is obtained from merging two subsequences of A and B. In this paper, we first propose an algorithm for solving the MLCS problem in O (n|Σ| + (r − L + 1)Lm) time and O (n|Σ| + Lm) space, where r and L denote the lengths of T and MLCS, respectively, and m and n denote the shorter and longer lengths of A and B , respectively. From the time complexity, it is clear that our algorithm is very efficient when T and E ( A, B ) are very similar. With slight modification, our algorithm can also solve another merged LCS problem variant, the block-merged LCS (BMLCS) problem, in O (n|Σ| + (r − L + 1)Lδ) time and O (n|Σ| + Lδ) space, where δ denotes the larger number of blocks of A and B. Experimental results show that our algorithms are faster than other previously published MLCS and BMLCS algorithms for sequences with high similarities. The source codes and datasets for experiments can be found on our web site http://par.cse.nsysu.edu.tw/~mlcs/ [20].

C++ source code
class Btriple {
public:
    int x;
    int y;
    int z;
    Btriple(int posA, int posB, int posT) { x = posA; y = posB; z = posT; }
};
int BMLCS_nakatzu(vector< int> T, vector< int> A, vector< int>B, int Tlen, int Alen, int Blen, int size, vector< int> EOBA, vector< int> EOBB) {
    vector< vector< int>> nextA(size + 1, vector< int>(Alen + 1, 0));
    vector< vector< int>> nextB(size + 1, vector< int>(Blen + 1, 0));
    vector< int> PosBlockA(Alen + 1, 0);
    vector< int> PosBlockB(Blen + 1, 0);
    vector< int> FEOBA(Alen + 1, 0);
    vector< int> FEOBB(Blen + 1, 0);
    int MaxLen = 0;
    vector< vector< Btriple>> QL(Tlen + 2, vector< Btriple>());
    vector< vector< Btriple>> tempQL(Tlen + 2, vector< Btriple>());
    int qlsize = 0;
    int locationA = 0;
    int locationB = 0;
    //construct array of nextA
    for (int i = 1; i <= size; ++i) {
        locationA = Alen + 1;
        for (int j = Alen; j > 0; --j) {
            if (j == Alen)
                nextA[i][j] = locationA;
            if (A[j] == i) {
                locationA = j;
                nextA[i][j - 1] = locationA;
            }
            else {
                nextA[i][j - 1] = locationA;
            }

        }
    }

    //construct array of nextB
    for (int i = 1; i <= size; ++i) {
        locationB = Blen + 1;
        for (int j = Blen; j > 0; --j) {
            if (j == Blen)
                nextB[i][j] = locationB;
            if (B[j] == i) {
                locationB = j;
                nextB[i][j - 1] = locationB;
            }
            else {
                nextB[i][j - 1] = locationB;
            }

        }
    }

    //construct PosA and PosB
    for (int i = 0, count = 0; i < Alen; ++i) {
        if (i <= EOBA[count]) {
            PosBlockA[i] = EOBA[count];
        }
        else {
            ++count;
            PosBlockA[i] = EOBA[count];
        }
    }
    PosBlockA[Alen] = EOBA[EOBA.size() - 1];
    for (int i = 0, count = 0; i < Blen; ++i) {
        if (i <= EOBB[count]) {
            PosBlockB[i] = EOBB[count];
        }
        else {
            ++count;
            PosBlockB[i] = EOBB[count];
        }
    }
    PosBlockB[Blen] = EOBB[EOBB.size() - 1];

    //construct FEOBA and FEOBB
    int sizeEOBA = EOBA.size(), sizeEOBB = EOBB.size();
    for (int i = 0; i < sizeEOBA; ++i) {
        FEOBA[EOBA[i]] = 1;
    }
    FEOBA[0] = 1;
    FEOBA[Alen] = 1;
    for (int i = 0; i < sizeEOBB; ++i) {
        FEOBB[EOBB[i]] = 1;
    }
    FEOBB[0] = 1;
    FEOBB[Blen] = 1;


    //main algorithm
    QL[0].push_back(Btriple(0, 0, 0));

    int PosA = 0;
    int PosB = 0;
    int PosT = 0;
    int LocalLen = 0;
    bool f = false;
    int TurnLen = 0;
    int tmpti = 0;
    for (int i = 1; i <= Tlen; ++i) {
        if (i > Tlen - MaxLen)
            break;
        //PosT = i;
        LocalLen = 0;
        tmpti = Tlen - i;
        for (int j = 0; j <= tmpti; ++j) {
            f = false;
            TurnLen = LocalLen;
            PosT = i + j;
            for (auto mytriple = QL[TurnLen].begin(); mytriple != QL[TurnLen].end(); ++mytriple) {
                //if (PosT > mytriple->z){
                PosA = nextA[T[PosT]][mytriple->x];
                PosB = nextB[T[PosT]][mytriple->y];
                if ((PosA <= Alen || PosB <= Blen) && f == false) {
                    ++LocalLen;
                    f = true;
                }
                //A[i] and B[j] are end of block.
                if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 1) {
                    if (PosA <= Alen) {
                        QL[LocalLen].push_back(Btriple(PosA, mytriple->y, PosT));
                    }
                    if (PosB <= Blen) {
                        QL[LocalLen].push_back(Btriple(mytriple->x, PosB, PosT));
                    }
                }
                //A[i] is EOB, but B[j] is not.
                else if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 0) {
                    if (PosB <= Blen) {
                        QL[LocalLen].push_back(Btriple(mytriple->x, PosB, PosT));
                    }
                    if (PosB > PosBlockB[mytriple->y] && PosA <= Alen) {
                        QL[LocalLen].push_back(Btriple(PosA, PosBlockB[mytriple->y], PosT));
                    }
                }
                //B[j] is EOB, but A[i] is not.
                else if (FEOBA[mytriple->x] == 0 && FEOBB[mytriple->y] == 1) {
                    if (PosA <= Alen) {
                        QL[LocalLen].push_back(Btriple(PosA, mytriple->y, PosT));
                    }
                    if (PosA > PosBlockA[mytriple->x] && PosB <= Blen) {
                        QL[LocalLen].push_back(Btriple(PosBlockA[mytriple->x], PosB, PosT));
                    }
                }
                //}
            }
            if (LocalLen > MaxLen)
            {
                MaxLen = LocalLen;
            }

            //2-D minima algorithm
            qlsize = QL[LocalLen].size();
            if (qlsize > 1 && f == true) {
                for (int i = 0; i < qlsize; ++i) {
                    Btriple temptri = QL[LocalLen][i];
                    if (!tempQL[temptri.x].empty() && temptri.y < tempQL[temptri.x][0].y) {
                        tempQL[temptri.x].erase(tempQL[temptri.x].begin());
                        tempQL[temptri.x].push_back(temptri);
                    }
                    else if (tempQL[temptri.x].empty() == true) {
                        tempQL[temptri.x].push_back(temptri);
                    }
                }
                QL[LocalLen].clear();
                int prevMaxY = Blen + 1;
                for (int i = 0; i <= Alen; ++i) {
                    if (!tempQL[i].empty()) {
                        if (tempQL[i][0].y < prevMaxY) {
                            prevMaxY = tempQL[i][0].y;
                            QL[LocalLen].push_back(tempQL[i][0]);
                        }
                        tempQL[i].clear();
                    }
                }
            }
            ++PosT;
            if (PosT > Tlen)
                break;
            if (f == false && QL[TurnLen + 1].empty() == true) {
                break;
            }
            if (f == false && QL[TurnLen + 1].empty() == false) {
                ++LocalLen;
            }

        }
    }


    return MaxLen;
}



The files
  All_BMLCS.cpp

Reference
Kuo-Tsung Tseng, De-Sheng Chan, Chang-Biau Yang, and Shou-Fu Lo, “Efficient Merged Longest Common Subsequence Algorithms for Similar Sequences,” Theoretical Computer Science 708 (2018) 75–90.